## setup libraries
library(tidyverse)
library(ggthemes)
library(ggmap)
library(rnaturalearth)
library(tmaptools)
library(sf)

## read precinct level data 
stimmbezirke  <- read_csv2("stimmbezirke-2017.csv")
## no problems so far 

## Some precincts have people voting, but no registered voters. Probably pseudo districts for postal votes 
## Focus on normal precincts and keep only relevant variables 
urnen  <- stimmbezirke %>% filter(Bezirksart == 0) %>% select(`Wahlberechtigte (A)`,`DIE LINKE`, AfD, Wahlkreis, Land, Regierungsbezirk, Kreis, Verbandsgemeinde, Gemeinde, Wahlbezirk)

## How many precincts?
zahl.urnen <- length(urnen$`Wahlberechtigte (A)`)


## Some precincts are very small. Filter out those with fewer than 100 voters
## rename first 
urnen  <- urnen %>% rename(wb=`Wahlberechtigte (A)`)
## count
zahl.kleine.wahlbezirke  <- sum(urnen$wb<100)
prozent.kleine.wahlbezirke  <- sum(urnen$wb<100) / length(urnen$wb <100) * 100
## filter
urnen  <- urnen %>% filter(wb>99) 


## Calculate vote shares for AfD and Left
urnen$afd.prozent  <- urnen$AfD / urnen$wb * 100
urnen$linke.prozent  <- urnen$`DIE LINKE` / urnen$wb * 100

## Convert state variable to factor and give labels 
urnen$land.factor <-as.factor(urnen$Land)
levels(urnen$land.factor)<- c("SH","HH","NI","HB","NW","HE","RP","BW","BY","SL","BE","BB","MV","SN","ST","TH")

## Create region (east vs west) from state. Treat Berlin as eastern
urnen$ost  <- 0
urnen[as.numeric(urnen$land.factor) >10,]$ost <-1
## Schöner als Faktor?
urnen$ost <- as.factor(urnen$ost)
levels(urnen$ost)  <- c("West","Ost")

## Reconstruct Regionalschlüssel (a key that identifies municipalities within districts, states etc.)
urnen$rgs <- paste0(urnen$Land,as.character(urnen$Regierungsbezirk),urnen$Kreis,urnen$Verbandsgemeinde,urnen$Gemeinde)

## gather for plotting  

longurnen  <- urnen %>% select(land.factor,ost,afd.prozent,linke.prozent,rgs) %>% gather(.,key = "Partei",value="Prozent",-land.factor,-ost,-rgs) 

longurnen$Partei  <- str_replace(longurnen$Partei,"afd.prozent","AfD")
longurnen$Partei  <- str_replace(longurnen$Partei,"linke.prozent","Linke")

## Sort by AfD share
afdresults  <- longurnen[longurnen$Partei=="AfD",]$Prozent
## duplicate
longurnen$afd.prozent  <- rep(afdresults,each=2)

# Plot
ggplot(data=longurnen,aes(x=reorder(land.factor,afd.prozent,FUN = median),y=Prozent,fill=Partei)) + geom_boxplot(notch=FALSE) + theme_tufte() + facet_grid(scales="free_x",cols=vars(longurnen$ost),space="free_x") + scale_fill_brewer(palette="Greys",type="qual") + xlab("") + ylab("Prozent (Wahlberechtigte)") + theme(axis.line=element_line()) + theme(panel.spacing = unit(3, "lines"))

## save 
ggsave(paste0(basedirfigures,"boxplot-wahlbezirke-2017.pdf"),width=6,height=4)

## Where are the AfD strongholds?
afd.top.100  <- urnen %>% top_n(.,100,afd.prozent) %>% select(afd.prozent,rgs,Land,Wahlkreis) %>% arrange(afd.prozent)
table(afd.top.100$Land)
## Saxony 
## And where are the strongholds for the Left?
linke.top.100  <- urnen %>% top_n(.,100,linke.prozent) %>% select(linke.prozent,rgs,Land,Wahlkreis) %>% arrange(linke.prozent)
table(linke.top.100$Land)
## More variation, stronger in the city states
